home *** CD-ROM | disk | FTP | other *** search
- /*
- * subst.c -- Repair substitution tables
- *
- * Copyright (C) 1997 Pretty Good Privacy, Inc.
- *
- * Written by Colin Plumb
- *
- * $Id: subst.c,v 1.2 1997/07/09 15:07:50 colin Exp $
- *
- * IT IS EXPECTED that users of this program will play with these tables
- * and the cost values in the subst.h header. (Some day, they'll all
- * get moved to an external config file.)
- *
- * NOTE: Other cost are hiding in the TabFilter function.
- * Remember to keep them all on the same scale.
- */
-
- /*
- * The repair program copies its input to its output, making various
- * substitutions, until it manages to produce a version that satisfies
- * the parser. This includes having a correct CRC for each line.
- * Each substitution has a cost, and the combinations are tried in order
- * of increasing cost. NOTE that even translating "A"->"A" counts as
- * a substitution, although it may have zero cost.
- *
- * The intention is to correct transcription errors, where the
- * errors have a distinctly non-uniform distribution. Slight
- * differences in cost produce a preference in trying some errors
- * first. If an error costs half as much as another, combinations
- * of two of that error will be compared to one of the more expensive.
- * Too many cheap substitutions will result is repair spending
- * a very log time searching before considering the more expensive
- * substitutions.
- *
- * The following parameters and the raw substitution tables are expected
- * to be edited by the user based on experience. Eventually, this
- * will be moved into an external config file, but for now it's a matter
- * of recompiling.
- */
-
- #include "subst.h"
- #include "util.h"
-
- /*
- * The input substitutions to make (one-to-one). These are listed in
- * the order of correction. i.e. uncorrected input first, then corrected
- * output. Substitutions are one-way; to get two-way, list it twice.
- */
-
- struct RawSubst const substSingles[] = {
- { " !\"#$%&'()*+,-./0123456789:;<=>?",
- " !\"#$%&'()*+,-./0123456789:;<=>?", 0, NULL },
- { "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_",
- "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_", 0, NULL },
- { "`abcdefghijklmnopqrstuvwxyz{|}~\f" TAB_STRING FORMFEED_STRING,
- "`abcdefghijklmnopqrstuvwxyz{|}~\f" TAB_STRING FORMFEED_STRING, 0, NULL },
- #if (TAB_PAD_CHAR & 128) /* Not already included? */
- { TAB_PAD_STRING, TAB_PAD_STRING, COST_LINE, NULL },
- #endif
- { "\n", "\n", COST_LINE, NULL },
- /* Common substitutions. These costs should be fiddled */
- { "-", "_", 1, NULL }, /* A *very* common error */
- { "()[]", "[]{}", 5, NULL },
- { "[]{}(){}", "()(){}[]", 10, NULL },
- { "1l!", "|||", 10, NULL },
- { "\"``,;;_g%SSOOLIIlIC27p",
- "''\".:i-9X$5o0ll11[[Z?P", 10, NULL },
- { "''\".:i-9X$5o0ll11[[Z?P",
- "\"``,;;_g%SSOOLIIlIC27p", 10, NULL },
- /* Guessed errors, that might happen */
- { "8B6G", "B8G6", 15, NULL },
- /* Some common insertion errors */
- { ".,'`", NULL, 10, NULL },
- { NULL, NULL, 0, NULL }
- };
-
- /* The many-to-many substitutions */
- struct RawSubst const substMultiples[] = {
- { "''", "\"", 10, NULL },
- { "``", "\"", 10, NULL },
- { " ", " ", 15, NULL },
- { "NIA", "MA", 9, NULL },
- { "riM", "NM", 9, NULL },
- { "\n", " */\n", 15, NULL },
- /* Tab-stop wonders */
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- { TAB_STRING" ", TAB_STRING" ", 0, TabFilter },
- #if TAB_PAD_CHAR != ' '
- #error Fix those tab patterns!
- #endif
- { NULL, NULL, 0, NULL }
- };
-